Simulations

time = proc.time()
seed = 3
s = sim_phyl(seed=3)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = F, impsam = T,tol=0.01)
## [1] "iteration # 1 :"
## [1]  1.8  0.6 80.0
## [1] "iteration # 2 :"
## [1]  1.4842506  0.5207339 85.0305292
## [1] "iteration # 3 :"
## [1]  1.2192968  0.5125791 84.6833348
## [1] "iteration # 4 :"
## [1]  1.1936977  0.4731993 86.3697085
## [1] "iteration # 5 :"
## [1]  1.068568  0.423197 81.356096
## [1] "iteration # 6 :"
## [1]  1.0022360  0.3461626 76.2385238
## [1] "iteration # 7 :"
## [1]  0.7566864  0.3218578 74.8233132
## [1] "iteration # 8 :"
## [1]  0.7189199  0.2733859 70.7821735
## [1] "iteration # 9 :"
## [1]  0.6959333  0.2312761 65.4365753
## [1] "iteration # 10 :"
## [1]  0.6853191  0.1933207 56.7717044
## [1] "iteration # 11 :"
## [1]  0.6580751  0.1397195 51.9871795
## [1] "iteration # 12 :"
## [1]  0.7782279  0.1049570 46.7171976
## [1] "iteration # 13 :"
## [1]  0.55532981  0.09431441 43.99874741
## [1] "iteration # 14 :"
## [1]  0.58056037  0.07290122 41.97261890
## [1] "iteration # 15 :"
## [1]  0.70374226  0.06355204 38.20742836
## [1] "iteration # 16 :"
## [1]  0.65611372  0.04316876 36.44853944
## [1] "iteration # 17 :"
## [1]  0.50578122  0.02898578 37.28834490
## [1] "iteration # 18 :"
## [1]  0.41548191  0.02073383 39.29751273
## [1] "iteration # 19 :"
## [1]  0.3985476  0.0106999 39.5556110
## [1] "iteration # 20 :"
## [1]  0.371649626  0.003713163 41.040860097
## [1] "iteration # 21 :"
## [1]  0.352464908  0.000857078 41.649117144
## [1] "iteration # 22 :"
## [1] 3.485289e-01 7.682937e-06 4.184807e+01
## [1] "iteration # 23 :"
## [1] 3.485040e-01 7.407260e-17 4.184898e+01
## [1] "iteration # 24 :"
## [1] 3.485039e-01 3.361365e-17 4.184899e+01
## [1] "iteration # 25 :"
## [1] 3.485039e-01 8.980709e-18 4.184899e+01
## [1] "iteration # 26 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 27 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 28 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 29 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 30 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
print(proc.time()-time)
##    user  system elapsed 
##  17.352   0.004  17.358
qplot(1:30,EM$pars[,2])

EM$pars
##            [,1]         [,2]     [,3]
##  [1,] 1.6826445 5.131967e-01 84.49220
##  [2,] 1.2881443 4.833367e-01 83.75733
##  [3,] 1.1303295 4.215241e-01 84.25198
##  [4,] 1.0615753 4.011336e-01 79.57020
##  [5,] 1.0438218 3.842778e-01 75.84205
##  [6,] 0.8454106 3.229822e-01 81.24672
##  [7,] 0.7304166 3.019546e-01 72.99091
##  [8,] 0.9039366 2.730956e-01 66.81371
##  [9,] 0.8543810 2.379271e-01 64.55642
## [10,] 0.7998869 2.063764e-01 55.54143
## [11,] 0.7763548 1.729484e-01 52.05320
## [12,] 0.8177746 1.536267e-01 46.29201
## [13,] 0.8366668 1.155314e-01 44.81956
## [14,] 0.6681238 8.438512e-02 44.55862
## [15,] 0.7038666 8.490554e-02 38.69240
## [16,] 0.5882912 4.809249e-02 38.56652
## [17,] 0.5153125 4.011941e-02 37.32253
## [18,] 0.4897349 2.787306e-02 37.44175
## [19,] 0.4066526 1.277044e-02 39.18647
## [20,] 0.3748614 4.705835e-03 40.49155
## [21,] 0.3502617 4.478507e-04 41.74687
## [22,] 0.3485157 2.887696e-06 41.84835
## [23,] 0.3485038 4.594623e-17 41.84900
## [24,] 0.3485038 7.845184e-18 41.84900
## [25,] 0.3485038 7.845184e-18 41.84900
## [26,] 0.3485038 7.845184e-18 41.84900
## [27,] 0.3485038 7.845184e-18 41.84900
## [28,] 0.3485038 7.845184e-18 41.84900
## [29,] 0.3485038 7.845184e-18 41.84900
## [30,] 0.3485038 7.845184e-18 41.84900
time = proc.time()
stat=NULL
phylo1 = s$newick.extant
for(i in 1:30){
  expe = expectedLTT2(pars=EM$pars[i,])
  wt = c(expe$bt[1],diff(expe$bt))
  p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
  phylo2 = p2phylo(p)
  ltt = ltt_stat(phylo1,phylo2)
  stat[i] = ltt
}
print(proc.time()-time)
##    user  system elapsed 
## 109.712   0.028 109.873
 qplot(1:30,stat)

what if fo the expected thing with 100 trees?

time = proc.time()
stat=NULL
for(i in 1:30){
  expe = expectedLTT2(pars=EM$pars[i,],n_it=100)
  wt = c(expe$bt[1],diff(expe$bt))
  p = list(wt=wt,E=rep(1,(length(expe$bt)-1)),n=expe$Ex)
  phylo2 = p2phylo(p)
  ltt = ltt_stat(phylo1,phylo2)
  stat[i] = ltt
}
print(proc.time()-time)
##     user   system  elapsed 
## 1088.716    0.168 1089.881
 qplot(1:30,stat)

wait, that was with importance sampling, now without that

n_it=70
time = proc.time()
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = F, impsam = F,tol=0.01, n_it=n_it)
## [1] "iteration # 1 :"
## [1]  1.8  0.6 80.0
## [1] "iteration # 2 :"
## [1]  1.4687259  0.5109511 83.9367905
## [1] "iteration # 3 :"
## [1]  1.2354543  0.4524527 85.9787185
## [1] "iteration # 4 :"
## [1]  1.0885570  0.4125031 83.7872732
## [1] "iteration # 5 :"
## [1]  0.9940949  0.3743226 83.4062401
## [1] "iteration # 6 :"
## [1]  0.9299009  0.3437404 80.6332006
## [1] "iteration # 7 :"
## [1]  0.8770352  0.3098066 76.2143483
## [1] "iteration # 8 :"
## [1]  0.8097597  0.2776245 69.8307526
## [1] "iteration # 9 :"
## [1]  0.7881551  0.2528049 65.8456808
## [1] "iteration # 10 :"
## [1]  0.7974545  0.2343711 63.8426577
## [1] "iteration # 11 :"
## [1]  0.8096807  0.2184623 59.9496685
## [1] "iteration # 12 :"
## [1]  0.7963039  0.1963645 57.0823244
## [1] "iteration # 13 :"
## [1]  0.7389314  0.1773784 55.0472440
## [1] "iteration # 14 :"
## [1]  0.7641483  0.1617025 50.7431973
## [1] "iteration # 15 :"
## [1]  0.7741984  0.1440097 49.2333581
## [1] "iteration # 16 :"
## [1]  0.6836012  0.1295144 48.5280765
## [1] "iteration # 17 :"
## [1]  0.6978149  0.1226944 45.7937147
## [1] "iteration # 18 :"
## [1]  0.7313179  0.1095122 43.8770433
## [1] "iteration # 19 :"
## [1]  0.8724892  0.1003700 41.6371633
## [1] "iteration # 20 :"
## [1]  0.82028540  0.09371596 40.91410866
## [1] "iteration # 21 :"
## [1]  0.76927161  0.08361124 40.59825246
## [1] "iteration # 22 :"
## [1]  0.67135728  0.07663422 40.05942278
## [1] "iteration # 23 :"
## [1]  0.77832781  0.06553154 37.84457077
## [1] "iteration # 24 :"
## [1]  0.78770345  0.06545125 37.74017470
## [1] "iteration # 25 :"
## [1]  0.76422907  0.06202151 37.93118475
## [1] "iteration # 26 :"
## [1]  0.74606037  0.06077505 37.93305216
## [1] "iteration # 27 :"
## [1]  0.71416559  0.05624148 38.36826827
## [1] "iteration # 28 :"
## [1]  0.7072428  0.0582574 37.6734186
## [1] "iteration # 29 :"
## [1]  0.72305926  0.06010951 37.81696416
## [1] "iteration # 30 :"
## [1]  0.76956648  0.06043783 37.48630953
## [1] "iteration # 31 :"
## [1]  0.73362660  0.05663433 37.23176318
## [1] "iteration # 32 :"
## [1]  0.72083824  0.05583536 37.11367041
## [1] "iteration # 33 :"
## [1]  0.68886747  0.05150015 36.88106284
## [1] "iteration # 34 :"
## [1]  0.7127205  0.0506989 36.7601088
## [1] "iteration # 35 :"
## [1]  0.64434903  0.04746435 37.03155861
## [1] "iteration # 36 :"
## [1]  0.64740184  0.04224881 37.01371103
## [1] "iteration # 37 :"
## [1]  0.67716329  0.04802233 36.90074846
## [1] "iteration # 38 :"
## [1]  0.69207295  0.05004281 36.45681273
## [1] "iteration # 39 :"
## [1]  0.63378296  0.04739888 37.52700969
## [1] "iteration # 40 :"
## [1]  0.62858848  0.04502988 36.83182319
## [1] "iteration # 41 :"
## [1]  0.62849968  0.04352506 36.93523458
## [1] "iteration # 42 :"
## [1]  0.65642924  0.04711901 36.75218388
## [1] "iteration # 43 :"
## [1]  0.68779618  0.04913433 36.31085789
## [1] "iteration # 44 :"
## [1]  0.71830797  0.04843048 36.43859463
## [1] "iteration # 45 :"
## [1]  0.66533796  0.04548733 37.11412500
## [1] "iteration # 46 :"
## [1]  0.65688384  0.04301202 37.31367248
## [1] "iteration # 47 :"
## [1]  0.64390148  0.04234054 36.99772508
## [1] "iteration # 48 :"
## [1]  0.5770895  0.0412872 37.2369383
## [1] "iteration # 49 :"
## [1]  0.5353054  0.0376977 37.1807887
## [1] "iteration # 50 :"
## [1]  0.52022136  0.03166425 37.82516736
## [1] "iteration # 51 :"
## [1]  0.50451690  0.03076214 37.82028853
## [1] "iteration # 52 :"
## [1]  0.52485663  0.03251099 37.47260283
## [1] "iteration # 53 :"
## [1]  0.50059051  0.02932463 37.66703467
## [1] "iteration # 54 :"
## [1]  0.46773086  0.02303666 38.38598078
## [1] "iteration # 55 :"
## [1]  0.45769681  0.02058162 38.26449341
## [1] "iteration # 56 :"
## [1]  0.44438411  0.01941827 38.80611765
## [1] "iteration # 57 :"
## [1]  0.42232536  0.01766626 39.09161329
## [1] "iteration # 58 :"
## [1]  0.41664523  0.01661823 39.19990684
## [1] "iteration # 59 :"
## [1]  0.40629754  0.01494541 39.57194186
## [1] "iteration # 60 :"
## [1]  0.39844771  0.01135293 39.99976033
## [1] "iteration # 61 :"
## [1]  0.40288079  0.01016386 39.72117476
## [1] "iteration # 62 :"
## [1]  0.387260049  0.008882784 40.147021035
## [1] "iteration # 63 :"
## [1]  0.382414898  0.007965389 40.322196845
## [1] "iteration # 64 :"
## [1]  0.386174000  0.007500892 40.201903034
## [1] "iteration # 65 :"
## [1]  0.394299336  0.009228552 39.879275466
## [1] "iteration # 66 :"
## [1]  0.367316760  0.005084478 40.922945984
## [1] "iteration # 67 :"
## [1]  0.371033192  0.005091513 40.704360899
## [1] "iteration # 68 :"
## [1]  0.36878164  0.00538057 40.93377223
## [1] "iteration # 69 :"
## [1]  0.378192961  0.005741875 40.580185778
## [1] "iteration # 70 :"
## [1]  0.371278103  0.005439656 40.661981893
print(proc.time()-time)
##    user  system elapsed 
##  20.684   0.004  20.699
qplot(1:n_it,EM$pars[,2])

stat=NULL
time=proc.time()
for(i in 1:n_it){
  expe = expectedLTT2(pars=EM$pars[i,])
  wt = c(expe$bt[1],diff(expe$bt))
  p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
  phylo2 = p2phylo(p)
  ltt = ltt_stat(phylo1,phylo2)
  stat[i] = ltt
}
print(proc.time()-time)
##    user  system elapsed 
## 174.948   0.024 175.159
 qplot(1:n_it,stat)

I am going to do the same but with 100 trees:

nice, I want to check for another tree

seed = 2
s = sim_phyl(seed = seed)
s2 = s$newick.extant.p
time=proc.time()
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = T, impsam = T,tol=0.01)
## [1] "iteration # 1 :"
## [1]  1.8  0.6 80.0
## [1] "iteration # 2 :"
## [1]  1.3461830  0.5465302 81.5614811
## [1] "iteration # 3 :"
## [1]  1.2138246  0.5073566 76.1795179
## [1] "iteration # 4 :"
## [1]  1.0033049  0.4994802 78.9430244
## [1] "iteration # 5 :"
## [1]  0.8293162  0.4597334 81.6740216
## [1] "iteration # 6 :"
## [1]  0.7893348  0.4239375 69.3908200
## [1] "iteration # 7 :"
## [1]  0.7346098  0.3742524 65.9291583
## [1] "iteration # 8 :"
## [1]  0.8155829  0.3352657 55.9614470
## [1] "iteration # 9 :"
## [1]  0.8186718  0.2832650 50.2406139
## [1] "iteration # 10 :"
## [1]  0.8521416  0.2551442 46.9344381
## [1] "iteration # 11 :"
## [1]  0.7696572  0.1996186 46.3053002
## [1] "iteration # 12 :"
## [1]  0.8218108  0.1893510 43.4042169
## [1] "iteration # 13 :"
## [1]  0.7015924  0.1525135 41.6455866
## [1] "iteration # 14 :"
## [1]  0.5882046  0.1145913 38.5167894
## [1] "iteration # 15 :"
## [1]  0.46028620  0.07810912 37.74753205
## [1] "iteration # 16 :"
## [1]  0.38635507  0.06546688 38.79687303
## [1] "iteration # 17 :"
## [1]  0.31732085  0.03638115 49.25900691
## [1] "iteration # 18 :"
## [1]  0.25961305  0.01965025 64.26064281
## [1] "iteration # 19 :"
## [1]  0.232889866  0.007452604 82.856482212
## [1] "iteration # 20 :"
## [1]  0.23489907  0.00554337 77.17070133
## [1] "iteration # 21 :"
## [1]  0.235077077  0.005236152 79.031905645
## [1] "iteration # 22 :"
## [1] 2.226448e-01 4.626761e-05 9.216272e+01
## [1] "iteration # 23 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 24 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 25 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 26 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 27 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 28 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 29 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 30 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
print(proc.time()-time)
##    user  system elapsed 
##  16.896   0.004  16.909
qplot(1:30,EM$pars[,2])

EM$pars
##            [,1]         [,2]     [,3]
##  [1,] 1.3461830 5.465302e-01 81.56148
##  [2,] 1.2138246 5.073566e-01 76.17952
##  [3,] 1.0033049 4.994802e-01 78.94302
##  [4,] 0.8293162 4.597334e-01 81.67402
##  [5,] 0.7893348 4.239375e-01 69.39082
##  [6,] 0.7346098 3.742524e-01 65.92916
##  [7,] 0.8155829 3.352657e-01 55.96145
##  [8,] 0.8186718 2.832650e-01 50.24061
##  [9,] 0.8521416 2.551442e-01 46.93444
## [10,] 0.7696572 1.996186e-01 46.30530
## [11,] 0.8218108 1.893510e-01 43.40422
## [12,] 0.7015924 1.525135e-01 41.64559
## [13,] 0.5882046 1.145913e-01 38.51679
## [14,] 0.4602862 7.810912e-02 37.74753
## [15,] 0.3863551 6.546688e-02 38.79687
## [16,] 0.3173208 3.638115e-02 49.25901
## [17,] 0.2596130 1.965025e-02 64.26064
## [18,] 0.2328899 7.452604e-03 82.85648
## [19,] 0.2348991 5.543370e-03 77.17070
## [20,] 0.2350771 5.236152e-03 79.03191
## [21,] 0.2226448 4.626761e-05 92.16272
## [22,] 0.2225562 8.803887e-18 92.25888
## [23,] 0.2225562 8.803887e-18 92.25888
## [24,] 0.2225562 8.803887e-18 92.25888
## [25,] 0.2225562 8.803887e-18 92.25888
## [26,] 0.2225562 8.803887e-18 92.25888
## [27,] 0.2225562 8.803887e-18 92.25888
## [28,] 0.2225562 8.803887e-18 92.25888
## [29,] 0.2225562 8.803887e-18 92.25888
## [30,] 0.2225562 8.803887e-18 92.25888
stat=NULL
time=proc.time()
phylo1 = s$newick.extant
for(i in 1:30){
  expe = expectedLTT2(pars=EM$pars[i,])
  wt = c(expe$bt[1],diff(expe$bt))
  p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
  phylo2 = p2phylo(p)
  ltt = ltt_stat(phylo1,phylo2)
  stat[i] = ltt
}
print(proc.time()-time)
##    user  system elapsed 
##  88.312   0.016  88.491
 qplot(1:30,stat)


Complete simulations with method 1 (with the fasted way (less accuracy))

The algorithm corresponds to

time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 30 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
for(j in 1:n_sim){
  s <- sim_phyl(seed=j)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = T, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:30){
    expe = expectedLTT2(pars=EM$pars[i,])
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
  qplot(1:30,stat)
}
print(proc.time()-time)
##      user    system   elapsed 
## 11382.232     0.928 11386.495
summary(Pars)
##        V1               V2                V3       
##  Min.   :0.3959   Min.   :0.01203   Min.   :33.59  
##  1st Qu.:0.5884   1st Qu.:0.05669   1st Qu.:39.58  
##  Median :0.6981   Median :0.09055   Median :42.65  
##  Mean   :0.6990   Mean   :0.10257   Mean   :43.93  
##  3rd Qu.:0.7669   3rd Qu.:0.11388   3rd Qu.:46.25  
##  Max.   :1.4188   Max.   :0.60123   Max.   :83.32
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(time)
##    user  system elapsed 
##   2.496   0.068   2.544
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 100 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim)
for(j in 1:n_sim){
  s <- sim_phyl(seed=j)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,])
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
  print(qplot(1:n_it,stat))
}

print(proc.time()-time)
##      user    system   elapsed 
## 24326.220     1.272 24634.366
summary(Pars)
##        V1               V2                V3       
##  Min.   :0.4762   Min.   :0.02977   Min.   :33.49  
##  1st Qu.:0.6773   1st Qu.:0.07093   1st Qu.:39.26  
##  Median :0.7659   Median :0.09063   Median :41.46  
##  Mean   :0.7830   Mean   :0.10130   Mean   :41.85  
##  3rd Qu.:0.8831   3rd Qu.:0.11238   3rd Qu.:44.00  
##  Max.   :1.2563   Max.   :0.59881   Max.   :66.73
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(time)
##    user  system elapsed 
##   2.540   0.064   2.584
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

for(j in 1:n_sim){
  EM=EMs[[j]]
  print(paste('´mu=0.1','seed',j))
  df = data.frame(it=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
  print(EM.plot(df))
  #p = ggplot(data=df,aes(x=it,y=EM,color=ltt))+geom_point()
  #print(p)
}
## [1] "´mu=0.1 seed 1"

## NULL
## [1] "´mu=0.1 seed 2"

## NULL
## [1] "´mu=0.1 seed 3"

## NULL
## [1] "´mu=0.1 seed 4"

## NULL
## [1] "´mu=0.1 seed 5"

## NULL
## [1] "´mu=0.1 seed 6"

## NULL
## [1] "´mu=0.1 seed 7"

## NULL
## [1] "´mu=0.1 seed 8"

## NULL
## [1] "´mu=0.1 seed 9"

## NULL
## [1] "´mu=0.1 seed 10"

## NULL
## [1] "´mu=0.1 seed 11"

## NULL
## [1] "´mu=0.1 seed 12"

## NULL
## [1] "´mu=0.1 seed 13"

## NULL
## [1] "´mu=0.1 seed 14"

## NULL
## [1] "´mu=0.1 seed 15"

## NULL
## [1] "´mu=0.1 seed 16"

## NULL
## [1] "´mu=0.1 seed 17"

## NULL
## [1] "´mu=0.1 seed 18"

## NULL
## [1] "´mu=0.1 seed 19"

## NULL
## [1] "´mu=0.1 seed 20"

## NULL
## [1] "´mu=0.1 seed 21"

## NULL
## [1] "´mu=0.1 seed 22"

## NULL
## [1] "´mu=0.1 seed 23"

## NULL
## [1] "´mu=0.1 seed 24"

## NULL
## [1] "´mu=0.1 seed 25"

## NULL
## [1] "´mu=0.1 seed 26"

## NULL
## [1] "´mu=0.1 seed 27"

## NULL
## [1] "´mu=0.1 seed 28"

## NULL
## [1] "´mu=0.1 seed 29"

## NULL
## [1] "´mu=0.1 seed 30"

## NULL
## [1] "´mu=0.1 seed 31"

## NULL
## [1] "´mu=0.1 seed 32"

## NULL
## [1] "´mu=0.1 seed 33"

## NULL
## [1] "´mu=0.1 seed 34"

## NULL
## [1] "´mu=0.1 seed 35"

## NULL
## [1] "´mu=0.1 seed 36"

## NULL
## [1] "´mu=0.1 seed 37"

## NULL
## [1] "´mu=0.1 seed 38"

## NULL
## [1] "´mu=0.1 seed 39"

## NULL
## [1] "´mu=0.1 seed 40"

## NULL
## [1] "´mu=0.1 seed 41"

## NULL
## [1] "´mu=0.1 seed 42"

## NULL
## [1] "´mu=0.1 seed 43"

## NULL
## [1] "´mu=0.1 seed 44"

## NULL
## [1] "´mu=0.1 seed 45"

## NULL
## [1] "´mu=0.1 seed 46"

## NULL
## [1] "´mu=0.1 seed 47"

## NULL
## [1] "´mu=0.1 seed 48"

## NULL
## [1] "´mu=0.1 seed 49"

## NULL
## [1] "´mu=0.1 seed 50"

## NULL
## [1] "´mu=0.1 seed 51"

## NULL
## [1] "´mu=0.1 seed 52"

## NULL
## [1] "´mu=0.1 seed 53"

## NULL
## [1] "´mu=0.1 seed 54"

## NULL
## [1] "´mu=0.1 seed 55"

## NULL
## [1] "´mu=0.1 seed 56"

## NULL
## [1] "´mu=0.1 seed 57"

## NULL
## [1] "´mu=0.1 seed 58"

## NULL
## [1] "´mu=0.1 seed 59"

## NULL
## [1] "´mu=0.1 seed 60"

## NULL
## [1] "´mu=0.1 seed 61"

## NULL
## [1] "´mu=0.1 seed 62"

## NULL
## [1] "´mu=0.1 seed 63"

## NULL
## [1] "´mu=0.1 seed 64"

## NULL
## [1] "´mu=0.1 seed 65"

## NULL
## [1] "´mu=0.1 seed 66"

## NULL
## [1] "´mu=0.1 seed 67"

## NULL
## [1] "´mu=0.1 seed 68"

## NULL
## [1] "´mu=0.1 seed 69"

## NULL
## [1] "´mu=0.1 seed 70"

## NULL
## [1] "´mu=0.1 seed 71"

## NULL
## [1] "´mu=0.1 seed 72"

## NULL
## [1] "´mu=0.1 seed 73"

## NULL
## [1] "´mu=0.1 seed 74"

## NULL
## [1] "´mu=0.1 seed 75"

## NULL
## [1] "´mu=0.1 seed 76"

## NULL
## [1] "´mu=0.1 seed 77"

## NULL
## [1] "´mu=0.1 seed 78"

## NULL
## [1] "´mu=0.1 seed 79"

## NULL
## [1] "´mu=0.1 seed 80"

## NULL
## [1] "´mu=0.1 seed 81"

## NULL
## [1] "´mu=0.1 seed 82"

## NULL
## [1] "´mu=0.1 seed 83"

## NULL
## [1] "´mu=0.1 seed 84"

## NULL
## [1] "´mu=0.1 seed 85"

## NULL
## [1] "´mu=0.1 seed 86"

## NULL
## [1] "´mu=0.1 seed 87"

## NULL
## [1] "´mu=0.1 seed 88"

## NULL
## [1] "´mu=0.1 seed 89"

## NULL
## [1] "´mu=0.1 seed 90"

## NULL
## [1] "´mu=0.1 seed 91"

## NULL
## [1] "´mu=0.1 seed 92"

## NULL
## [1] "´mu=0.1 seed 93"

## NULL
## [1] "´mu=0.1 seed 94"

## NULL
## [1] "´mu=0.1 seed 95"

## NULL
## [1] "´mu=0.1 seed 96"

## NULL
## [1] "´mu=0.1 seed 97"

## NULL
## [1] "´mu=0.1 seed 98"

## NULL
## [1] "´mu=0.1 seed 99"

## NULL
## [1] "´mu=0.1 seed 100"

## NULL
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 120 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim) 
for(j in 1:n_sim){
  s <- sim_phyl(seed=j,mu0 = 0.2)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,])
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
#  print(qplot(1:n_it,stat))
}
print(proc.time()-time)
##      user    system   elapsed 
## 31792.844     0.556 33716.456
summary(Pars)
##        V1               V2                V3       
##  Min.   :0.4647   Min.   :0.05325   Min.   :25.39  
##  1st Qu.:0.6499   1st Qu.:0.11239   1st Qu.:35.53  
##  Median :0.7395   Median :0.13715   Median :39.34  
##  Mean   :0.7631   Mean   :0.14133   Mean   :39.18  
##  3rd Qu.:0.8427   3rd Qu.:0.16089   3rd Qu.:42.54  
##  Max.   :1.2313   Max.   :0.40700   Max.   :52.53
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(time)
##    user  system elapsed 
##   2.588   0.076   2.647
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

for(j in 1:n_sim){
  EM=EMs[[j]]
  print(paste('´mu=0.2','seed',j))
  df = data.frame(it=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
  print(EM.plot(df))
}
## [1] "´mu=0.2 seed 1"

## NULL
## [1] "´mu=0.2 seed 2"

## NULL
## [1] "´mu=0.2 seed 3"

## NULL
## [1] "´mu=0.2 seed 4"

## NULL
## [1] "´mu=0.2 seed 5"

## NULL
## [1] "´mu=0.2 seed 6"

## NULL
## [1] "´mu=0.2 seed 7"

## NULL
## [1] "´mu=0.2 seed 8"

## NULL
## [1] "´mu=0.2 seed 9"

## NULL
## [1] "´mu=0.2 seed 10"

## NULL
## [1] "´mu=0.2 seed 11"

## NULL
## [1] "´mu=0.2 seed 12"

## NULL
## [1] "´mu=0.2 seed 13"

## NULL
## [1] "´mu=0.2 seed 14"

## NULL
## [1] "´mu=0.2 seed 15"

## NULL
## [1] "´mu=0.2 seed 16"

## NULL
## [1] "´mu=0.2 seed 17"

## NULL
## [1] "´mu=0.2 seed 18"

## NULL
## [1] "´mu=0.2 seed 19"

## NULL
## [1] "´mu=0.2 seed 20"

## NULL
## [1] "´mu=0.2 seed 21"

## NULL
## [1] "´mu=0.2 seed 22"

## NULL
## [1] "´mu=0.2 seed 23"

## NULL
## [1] "´mu=0.2 seed 24"

## NULL
## [1] "´mu=0.2 seed 25"

## NULL
## [1] "´mu=0.2 seed 26"

## NULL
## [1] "´mu=0.2 seed 27"

## NULL
## [1] "´mu=0.2 seed 28"

## NULL
## [1] "´mu=0.2 seed 29"

## NULL
## [1] "´mu=0.2 seed 30"

## NULL
## [1] "´mu=0.2 seed 31"

## NULL
## [1] "´mu=0.2 seed 32"

## NULL
## [1] "´mu=0.2 seed 33"

## NULL
## [1] "´mu=0.2 seed 34"

## NULL
## [1] "´mu=0.2 seed 35"

## NULL
## [1] "´mu=0.2 seed 36"

## NULL
## [1] "´mu=0.2 seed 37"

## NULL
## [1] "´mu=0.2 seed 38"

## NULL
## [1] "´mu=0.2 seed 39"

## NULL
## [1] "´mu=0.2 seed 40"

## NULL
## [1] "´mu=0.2 seed 41"

## NULL
## [1] "´mu=0.2 seed 42"

## NULL
## [1] "´mu=0.2 seed 43"

## NULL
## [1] "´mu=0.2 seed 44"

## NULL
## [1] "´mu=0.2 seed 45"

## NULL
## [1] "´mu=0.2 seed 46"

## NULL
## [1] "´mu=0.2 seed 47"

## NULL
## [1] "´mu=0.2 seed 48"

## NULL
## [1] "´mu=0.2 seed 49"

## NULL
## [1] "´mu=0.2 seed 50"

## NULL
## [1] "´mu=0.2 seed 51"

## NULL
## [1] "´mu=0.2 seed 52"

## NULL
## [1] "´mu=0.2 seed 53"

## NULL
## [1] "´mu=0.2 seed 54"

## NULL
## [1] "´mu=0.2 seed 55"

## NULL
## [1] "´mu=0.2 seed 56"

## NULL
## [1] "´mu=0.2 seed 57"

## NULL
## [1] "´mu=0.2 seed 58"

## NULL
## [1] "´mu=0.2 seed 59"

## NULL
## [1] "´mu=0.2 seed 60"

## NULL
## [1] "´mu=0.2 seed 61"

## NULL
## [1] "´mu=0.2 seed 62"

## NULL
## [1] "´mu=0.2 seed 63"

## NULL
## [1] "´mu=0.2 seed 64"

## NULL
## [1] "´mu=0.2 seed 65"

## NULL
## [1] "´mu=0.2 seed 66"

## NULL
## [1] "´mu=0.2 seed 67"

## NULL
## [1] "´mu=0.2 seed 68"

## NULL
## [1] "´mu=0.2 seed 69"

## NULL
## [1] "´mu=0.2 seed 70"

## NULL
## [1] "´mu=0.2 seed 71"

## NULL
## [1] "´mu=0.2 seed 72"

## NULL
## [1] "´mu=0.2 seed 73"

## NULL
## [1] "´mu=0.2 seed 74"

## NULL
## [1] "´mu=0.2 seed 75"

## NULL
## [1] "´mu=0.2 seed 76"

## NULL
## [1] "´mu=0.2 seed 77"

## NULL
## [1] "´mu=0.2 seed 78"

## NULL
## [1] "´mu=0.2 seed 79"

## NULL
## [1] "´mu=0.2 seed 80"

## NULL
## [1] "´mu=0.2 seed 81"

## NULL
## [1] "´mu=0.2 seed 82"

## NULL
## [1] "´mu=0.2 seed 83"

## NULL
## [1] "´mu=0.2 seed 84"

## NULL
## [1] "´mu=0.2 seed 85"

## NULL
## [1] "´mu=0.2 seed 86"

## NULL
## [1] "´mu=0.2 seed 87"

## NULL
## [1] "´mu=0.2 seed 88"

## NULL
## [1] "´mu=0.2 seed 89"

## NULL
## [1] "´mu=0.2 seed 90"

## NULL
## [1] "´mu=0.2 seed 91"

## NULL
## [1] "´mu=0.2 seed 92"

## NULL
## [1] "´mu=0.2 seed 93"

## NULL
## [1] "´mu=0.2 seed 94"

## NULL
## [1] "´mu=0.2 seed 95"

## NULL
## [1] "´mu=0.2 seed 96"

## NULL
## [1] "´mu=0.2 seed 97"

## NULL
## [1] "´mu=0.2 seed 98"

## NULL
## [1] "´mu=0.2 seed 99"

## NULL
## [1] "´mu=0.2 seed 100"

## NULL
time = proc.time()
n_sim = 10 #number of simulated trees
n_it = 120 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim)
Same_dim = vector(mode='list',length=n_sim)
for(j in 1:n_sim){
  s <- sim_phyl(seed=j,mu0 = 0.2)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  same_dim = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,],dim = length(s2$wt))
    same_dim[i] = expe$same_dim
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  print(proc.time()-time)
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Same_dim[[j]] = same_dim
  EM_sub = EM$pars[same_dim==max(same_dim),]
  stat_sub = stat[same_dim==max(same_dim)]
  choosed_stat = min(stat_sub)
  Pars[j,] = EM$pars[stat==choosed_stat,]
  times[j] = sum(EM$times)
#  print(qplot(1:n_it,stat))
}
##    user  system elapsed 
## 509.792   0.028 509.812 
##     user   system  elapsed 
## 1157.016    0.040 1157.025 
##     user   system  elapsed 
## 2305.996    0.052 2305.991 
##     user   system  elapsed 
## 2857.604    0.056 2857.572 
##     user   system  elapsed 
## 3268.140    0.056 3268.084 
##     user   system  elapsed 
## 4028.268    0.056 4028.177 
##     user   system  elapsed 
## 4725.572    0.056 4725.440 
##     user   system  elapsed 
## 5413.868    0.056 5413.696 
##     user   system  elapsed 
## 5899.936    0.064 5899.753 
##     user   system  elapsed 
## 6639.832    0.072 6639.624
print(proc.time()-time)
##     user   system  elapsed 
## 6639.832    0.072 6639.625
Pars
##            [,1]       [,2]     [,3]
##  [1,] 0.7577978 0.10936400 36.41123
##  [2,] 1.1738909 0.24278529 25.06895
##  [3,] 1.1812332 0.19470719 38.07533
##  [4,] 0.6001767 0.08538488 34.74088
##  [5,] 0.6602777 0.13019993 40.01446
##  [6,] 0.9969060 0.17990443 37.52759
##  [7,] 0.7327009 0.10385624 48.61744
##  [8,] 0.7406627 0.15793522 34.54369
##  [9,] 0.6831924 0.14176165 38.82275
## [10,] 0.8985528 0.14451340 44.17439
summary(Pars)
##        V1               V2                V3       
##  Min.   :0.6002   Min.   :0.08538   Min.   :25.07  
##  1st Qu.:0.6956   1st Qu.:0.11457   1st Qu.:35.16  
##  Median :0.7492   Median :0.14314   Median :37.80  
##  Mean   :0.8425   Mean   :0.14904   Mean   :37.80  
##  3rd Qu.:0.9723   3rd Qu.:0.17441   3rd Qu.:39.72  
##  Max.   :1.1812   Max.   :0.24279   Max.   :48.62
for(j in 1:n_sim){
  EM=EMs[[j]]
  print(paste('´mu=0.2','seed',j))
  df = data.frame(it=1:n_it, EM=EM[,2],ltt=LTTs[[j]],sd=Same_dim[[j]])
  print(EM.plot(df,dim = TRUE))
}
## [1] "´mu=0.2 seed 1"

## NULL
## [1] "´mu=0.2 seed 2"

## NULL
## [1] "´mu=0.2 seed 3"

## NULL
## [1] "´mu=0.2 seed 4"

## NULL
## [1] "´mu=0.2 seed 5"

## NULL
## [1] "´mu=0.2 seed 6"

## NULL
## [1] "´mu=0.2 seed 7"

## NULL
## [1] "´mu=0.2 seed 8"

## NULL
## [1] "´mu=0.2 seed 9"

## NULL
## [1] "´mu=0.2 seed 10"

## NULL
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 80 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim) 
for(j in 1:n_sim){
  s <- sim_phyl(seed=j,mu0 = 0.4)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,])
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
#  print(qplot(1:n_it,stat))
}
print(proc.time()-time)
##      user    system   elapsed 
## 33702.216     0.304 33701.852
summary(Pars)
##        V1               V2                V3           
##  Min.   :0.2613   Min.   :0.07469   Min.   :1.700e+01  
##  1st Qu.:0.6259   1st Qu.:0.16789   1st Qu.:3.200e+01  
##  Median :0.8254   Median :0.23070   Median :3.700e+01  
##  Mean   :0.7996   Mean   :0.26859   Mean   :2.617e+10  
##  3rd Qu.:0.9467   3rd Qu.:0.29555   3rd Qu.:4.300e+01  
##  Max.   :1.2776   Max.   :0.72551   Max.   :2.617e+12
summary(times)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.53   27.92   32.03   32.04   36.20   46.89
for(j in 1:n_sim){
  EM=EMs[[j]]
  print(paste('´mu=0.4','seed',j))
  df = data.frame(it=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
  print(EM.plot(df))
  #p = ggplot(data=df,aes(x=it,y=EM,color=ltt))+geom_point()
  #print(p)
}
## [1] "´mu=0.4 seed 1"

## NULL
## [1] "´mu=0.4 seed 2"

## NULL
## [1] "´mu=0.4 seed 3"

## NULL
## [1] "´mu=0.4 seed 4"

## NULL
## [1] "´mu=0.4 seed 5"

## NULL
## [1] "´mu=0.4 seed 6"

## NULL
## [1] "´mu=0.4 seed 7"

## NULL
## [1] "´mu=0.4 seed 8"

## NULL
## [1] "´mu=0.4 seed 9"

## NULL
## [1] "´mu=0.4 seed 10"

## NULL
## [1] "´mu=0.4 seed 11"

## NULL
## [1] "´mu=0.4 seed 12"

## NULL
## [1] "´mu=0.4 seed 13"

## NULL
## [1] "´mu=0.4 seed 14"

## NULL
## [1] "´mu=0.4 seed 15"

## NULL
## [1] "´mu=0.4 seed 16"

## NULL
## [1] "´mu=0.4 seed 17"

## NULL
## [1] "´mu=0.4 seed 18"

## NULL
## [1] "´mu=0.4 seed 19"

## NULL
## [1] "´mu=0.4 seed 20"

## NULL
## [1] "´mu=0.4 seed 21"

## NULL
## [1] "´mu=0.4 seed 22"

## NULL
## [1] "´mu=0.4 seed 23"

## NULL
## [1] "´mu=0.4 seed 24"

## NULL
## [1] "´mu=0.4 seed 25"

## NULL
## [1] "´mu=0.4 seed 26"

## NULL
## [1] "´mu=0.4 seed 27"

## NULL
## [1] "´mu=0.4 seed 28"

## NULL
## [1] "´mu=0.4 seed 29"

## NULL
## [1] "´mu=0.4 seed 30"

## NULL
## [1] "´mu=0.4 seed 31"

## NULL
## [1] "´mu=0.4 seed 32"

## NULL
## [1] "´mu=0.4 seed 33"

## NULL
## [1] "´mu=0.4 seed 34"

## NULL
## [1] "´mu=0.4 seed 35"

## NULL
## [1] "´mu=0.4 seed 36"

## NULL
## [1] "´mu=0.4 seed 37"

## NULL
## [1] "´mu=0.4 seed 38"

## NULL
## [1] "´mu=0.4 seed 39"

## NULL
## [1] "´mu=0.4 seed 40"

## NULL
## [1] "´mu=0.4 seed 41"

## NULL
## [1] "´mu=0.4 seed 42"

## NULL
## [1] "´mu=0.4 seed 43"

## NULL
## [1] "´mu=0.4 seed 44"

## NULL
## [1] "´mu=0.4 seed 45"

## NULL
## [1] "´mu=0.4 seed 46"

## NULL
## [1] "´mu=0.4 seed 47"

## NULL
## [1] "´mu=0.4 seed 48"

## NULL
## [1] "´mu=0.4 seed 49"

## NULL
## [1] "´mu=0.4 seed 50"

## NULL
## [1] "´mu=0.4 seed 51"

## NULL
## [1] "´mu=0.4 seed 52"

## NULL
## [1] "´mu=0.4 seed 53"

## NULL
## [1] "´mu=0.4 seed 54"

## NULL
## [1] "´mu=0.4 seed 55"

## NULL
## [1] "´mu=0.4 seed 56"

## NULL
## [1] "´mu=0.4 seed 57"

## NULL
## [1] "´mu=0.4 seed 58"

## NULL
## [1] "´mu=0.4 seed 59"

## NULL
## [1] "´mu=0.4 seed 60"

## NULL
## [1] "´mu=0.4 seed 61"

## NULL
## [1] "´mu=0.4 seed 62"

## NULL
## [1] "´mu=0.4 seed 63"

## NULL
## [1] "´mu=0.4 seed 64"

## NULL
## [1] "´mu=0.4 seed 65"

## NULL
## [1] "´mu=0.4 seed 66"

## NULL
## [1] "´mu=0.4 seed 67"

## NULL
## [1] "´mu=0.4 seed 68"

## NULL
## [1] "´mu=0.4 seed 69"

## NULL
## [1] "´mu=0.4 seed 70"

## NULL
## [1] "´mu=0.4 seed 71"

## NULL
## [1] "´mu=0.4 seed 72"

## NULL
## [1] "´mu=0.4 seed 73"

## NULL
## [1] "´mu=0.4 seed 74"

## NULL
## [1] "´mu=0.4 seed 75"

## NULL
## [1] "´mu=0.4 seed 76"

## NULL
## [1] "´mu=0.4 seed 77"

## NULL
## [1] "´mu=0.4 seed 78"

## NULL
## [1] "´mu=0.4 seed 79"

## NULL
## [1] "´mu=0.4 seed 80"

## NULL
## [1] "´mu=0.4 seed 81"

## NULL
## [1] "´mu=0.4 seed 82"

## NULL
## [1] "´mu=0.4 seed 83"

## NULL
## [1] "´mu=0.4 seed 84"

## NULL
## [1] "´mu=0.4 seed 85"

## NULL
## [1] "´mu=0.4 seed 86"

## NULL
## [1] "´mu=0.4 seed 87"

## NULL
## [1] "´mu=0.4 seed 88"

## NULL
## [1] "´mu=0.4 seed 89"

## NULL
## [1] "´mu=0.4 seed 90"

## NULL
## [1] "´mu=0.4 seed 91"

## NULL
## [1] "´mu=0.4 seed 92"

## NULL
## [1] "´mu=0.4 seed 93"

## NULL
## [1] "´mu=0.4 seed 94"

## NULL
## [1] "´mu=0.4 seed 95"

## NULL
## [1] "´mu=0.4 seed 96"

## NULL
## [1] "´mu=0.4 seed 97"

## NULL
## [1] "´mu=0.4 seed 98"

## NULL
## [1] "´mu=0.4 seed 99"

## NULL
## [1] "´mu=0.4 seed 100"

## NULL
time = proc.time()
n_sim = 10 #number of simulated trees
n_it = 150 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim) 
for(j in 1:n_sim){
  s <- sim_phyl(seed=j,mu0 = 0.4)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,])
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
#  print(qplot(1:n_it,stat))
}
print(proc.time()-time)
##     user   system  elapsed 
## 4446.292    0.096 4446.285
summary(Pars)
##        V1               V2                V3        
##  Min.   :0.3160   Min.   :0.02674   Min.   : 22.87  
##  1st Qu.:0.5158   1st Qu.:0.12257   1st Qu.: 32.07  
##  Median :0.6373   Median :0.16354   Median : 38.24  
##  Mean   :0.7000   Mean   :0.27617   Mean   : 43.90  
##  3rd Qu.:0.8419   3rd Qu.:0.44239   3rd Qu.: 46.57  
##  Max.   :1.2127   Max.   :0.70676   Max.   :105.13
summary(times)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   32.00   39.99   52.31   50.24   55.40   77.03
for(j in 1:n_sim){
  EM=EMs[[j]]
  print(paste('mu=0.4','seed',j))
  df = data.frame(it=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
  print(EM.plot(df))
  #p = ggplot(data=df,aes(x=it,y=EM,color=ltt))+geom_point()
  #print(p)
}
## [1] "mu=0.4 seed 1"

## NULL
## [1] "mu=0.4 seed 2"

## NULL
## [1] "mu=0.4 seed 3"

## NULL
## [1] "mu=0.4 seed 4"

## NULL
## [1] "mu=0.4 seed 5"

## NULL
## [1] "mu=0.4 seed 6"

## NULL
## [1] "mu=0.4 seed 7"

## NULL
## [1] "mu=0.4 seed 8"

## NULL
## [1] "mu=0.4 seed 9"

## NULL
## [1] "mu=0.4 seed 10"

## NULL
time = proc.time()
n_sim = 10 #number of simulated trees
n_it = 150 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim) 
EMs = vector(mode='list',length = n_sim) 
LTTs = vector(mode='list',length = n_sim) 
for(j in 1:n_sim){
  s <- sim_phyl(seed=j,mu0 = 0.4)
  s2 = s$newick.extant.p
  EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
  stat = vector(mode='numeric',length = n_it) 
  phylo1 = s$newick.extant
  print(paste('iteration',j))
  for(i in 1:n_it){
    expe = expectedLTT2(pars=EM$pars[i,],median=TRUE)
    wt = c(expe$bt[1],diff(expe$bt))
    p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
    phylo2 = p2phylo(p)
    ltt = ltt_stat(phylo1,phylo2)
    stat[i] = ltt
  }
  EMs[[j]] = EM$pars
  LTTs[[j]] = stat
  Pars[j,] = EM$pars[stat==min(stat),]
  times[j] = sum(EM$times)
#  print(qplot(1:n_it,stat))
}
## [1] "iteration 1"
## [1] "iteration 2"
## [1] "iteration 3"
## [1] "iteration 4"
## [1] "iteration 5"
## [1] "iteration 6"
## [1] "iteration 7"
## [1] "iteration 8"
## [1] "iteration 9"
## [1] "iteration 10"
print(proc.time()-time)
##     user   system  elapsed 
## 4536.468    0.108 4536.534
summary(Pars)
##        V1               V2                V3           
##  Min.   :0.2179   Min.   :0.01218   Min.   :1.500e+01  
##  1st Qu.:0.2918   1st Qu.:0.04013   1st Qu.:3.500e+01  
##  Median :0.3807   Median :0.10443   Median :5.400e+01  
##  Mean   :0.5348   Mean   :0.14834   Mean   :1.422e+13  
##  3rd Qu.:0.7283   3rd Qu.:0.20417   3rd Qu.:7.300e+01  
##  Max.   :1.2134   Max.   :0.41519   Max.   :1.422e+14
summary(times)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.77   39.95   54.79   51.09   56.50   77.92
for(j in 1:n_sim){
  EM=EMs[[j]]
  df = data.frame(n=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
  p = ggplot(data=df,aes(x=n,y=EM,color=ltt))+geom_point()
  q = ggplot(data=df,aes(x=n,y=ltt,color=EM))+geom_point()
  print(paste('seed',j))
  print(p)
  print(q)
}
## [1] "seed 1"

## [1] "seed 2"

## [1] "seed 3"

## [1] "seed 4"

## [1] "seed 5"

## [1] "seed 6"

## [1] "seed 7"

## [1] "seed 8"

## [1] "seed 9"

## [1] "seed 10"


Simulations March 2017

Given a reconstructed phylogenetic tree

phylo <- sim_phyl(seed=3)$newick.extant

The new proposed method corresponds to the following steps

  1. Set initial parameters for \(\lambda_0\) and \(K\)
pars = c(2,80)
  1. Given the parameters, find \(\mu^*\) such that minimizes ltt_stat
mu=0.2
# this does not work at all:
#pars = subplex(par = mu, fn = ltt_mu, phylo=phylo, prior_pars = pars)$par
#let´s visualize the ltt_mu curve
pp = proc.time()
mu = seq(0.01, 0.8, by=0.03)
stat=NULL
for(i in 1:length(mu)){
  stat[i] = ltt_mu(mu = mu[i], phylo = phylo, prior_pars = pars)
}
## [1] "DIFFERENT LENGHT IN PHYLOGENIES!"
print(pp - proc.time())
##     user   system  elapsed 
## -392.140   -0.136 -392.470

and

qplot(mu,stat)

nmu = mean(mu[stat < quantile(stat,0.05)])
nmu
## [1] 0.325
  1. Once we have the new \(\mu\) we can update \(\lambda\) and \(K\) fixing \(\mu\).

HERE SHOULD BE a 2-parameter version of the EM

  1. Once we have the new \(\lambda\) and \(K\) we go to step 2 to re-calculate \(\mu\)

  2. We repeat this algorithm until convergence.